def main(): parser = argparse.ArgumentParser() parser.add_argument("--verbose", help="print logging messages to stdout", action="store_true") parser.add_argument("--progress", help="print a progress bar", action="store_true") subparsers = parser.add_subparsers(dest="command") subparsers.add_parser( "train", help="train the tagger from standard notmuch database") subparsers.add_parser("tag", help="tag the mails with a new-tag") subparsers.add_parser( "validate", help= "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4." ) args = parser.parse_args() db = Database() path = db.get_path() db.close() filename = os.path.join(path, "blaecksprutte.db") warnings.simplefilter('ignore', UndefinedMetricWarning) level = logging.ERROR if args.verbose: level = logging.INFO log = logging.getLogger(__name__) out_hdlr = logging.StreamHandler(sys.stdout) out_hdlr.setFormatter(logging.Formatter('%(message)s')) out_hdlr.setLevel(level) log.addHandler(out_hdlr) log.setLevel(level) if args.command == 'train': v, b, c = train_from_bottom(log, args.progress) with open(filename, 'wb') as f: pickle.dump([v, b, c], f, pickle.HIGHEST_PROTOCOL) if args.command == 'tag': tag_new_mails(filename, log) if args.command == 'validate': validate(log, args.progress)
def cmd_new(self): """Run 'notmuch new'""" #get the database directory db = Database(mode=Database.MODE.READ_WRITE) path = db.get_path() print self._add_new_files_recursively(path, db)
def cmd_new(self): """Run 'notmuch new'""" #get the database directory db = Database(mode=Database.MODE.READ_WRITE) path = db.get_path() print self._add_new_files_recursively(path, db)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--verbose", help="print logging messages to stdout", action="store_true") parser.add_argument("--progress", help="print a progress bar", action="store_true") subparsers = parser.add_subparsers(dest="command") subparsers.add_parser( "train", help="train the tagger from standard notmuch database") subparsers.add_parser("tag", help="tag the mails with a new-tag") subparsers.add_parser( "validate", help= "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4." ) subparsers.add_parser( "optimize", help= "perform a grid search with 60 different possible hyperparameters to find the best ones" ) args = parser.parse_args() db = Database() path = db.get_path() db.close() model_filename = os.path.join(path, "blaecksprutte.db") pipeline_filename = os.path.join(path, "best_pipeline.db") warnings.simplefilter('ignore', UndefinedMetricWarning) warnings.simplefilter('ignore', FutureWarning) warnings.simplefilter('ignore', UserWarning) level = logging.ERROR if args.verbose: level = logging.INFO log = logging.getLogger(__name__) out_hdlr = logging.StreamHandler(sys.stdout) out_hdlr.setFormatter(logging.Formatter('%(message)s')) out_hdlr.setLevel(level) log.addHandler(out_hdlr) log.setLevel(level) if args.command == 'train': train(log, pipeline_filename, model_filename, args.progress) if args.command == 'tag': if not os.path.isfile(model_filename): log.warn( "no existing model file found: training model. This may take some time!" ) train(log, pipeline_filename, model_filename, args.progress) tag_new_mails(model_filename, log) if args.command == 'validate': validate(log, pipeline_filename, args.progress) if args.command == 'optimize': optimize(log, pipeline_filename, args.progress)