def __init__(self, tokenize): self.tokenize = tokenize nof_docs = { "pos": sum(1 for _ in docs("train", "pos")), "neg": sum(1 for _ in docs("train", "neg")), } nof_docs_total = sum(nof_docs.values()) voc = vocabulary(self.tokenize) self.dict = { "pos": { "counter": voc["pos"] }, "neg": { "counter": voc["neg"] } } for feel in self.dict: self.dict[feel]["nof_tokens"] = sum( self.dict[feel]["counter"].values()) self.dict[feel]["log_prob"] = math.log(nof_docs[feel] / nof_docs_total) set_pos = set(self.dict["pos"]["counter"]) set_neg = set(self.dict["neg"]["counter"]) set_total = set_pos | set_neg self.dict["voc_size"] = len(set_total)
def measure(classifier, dataset: str = "test", alpha: int = 1): marks_pos = [classifier.is_pos(f, alpha) for f in docs(dataset, "pos")] marks_neg = [classifier.is_pos(f, alpha) for f in docs(dataset, "neg")] ms = measure_metrics(marks_pos, marks_neg) print(f" | Precision | Recall | F-measure") print(f' Micro | {ms["P_micro"]:9.2f} | {ms["R_micro"]:6.2f} | {ms["F_micro"]:8.2f}') print(f' Macro | {ms["P_macro"]:9.2f} | {ms["R_macro"]:6.2f} | {ms["F_macro"]:8.2f}') print() return marks_pos, marks_neg
def vocabulary(tokenize): vocab = { "pos": Counter(), "neg": Counter(), } for feel, counter in vocab.items(): for doc in docs("train", feel): counter += tokenize(doc) return vocab
def dump_debug(): """Debug screen (active routes)""" if not app.settings.debug: abort(400, "Access Denied") app.template_vars.update({ 'title': 'Debug information', 'modules': utils.docs(bottle_default_app()) }) return app.template_vars
def index_word_appearancefraction(): vocabulary = set() sets = (list(), list()) for i, feel in enumerate(["pos", "neg"]): for doc in docs(feel=feel): toks = tokens(doc) vocabulary |= toks sets[i].append(toks) return { "index": { word: (sum(1 for s in sets[0] if word in s), sum(1 for s in sets[1] if word in s)) for word in vocabulary }, "total_nof_docs": tuple([len(s) for s in sets]), }
def send_doc_wrapper(path): """Render documentation for a specific API module""" docs = utils.docs() if path in docs: return {"title": path.title(), "docs": docs[path]} abort(404,"Not Found")
def send_doc_wrapper(path): """Render documentation for a specific API module""" docs = utils.docs() if path in docs: return {"title": path.title(), "docs": docs[path]} abort(404, "Not Found")
def dump_debug(): """Debug screen (active routes)""" if not app.config.debug: abort(400, "Access Denied") app.template_vars.update({"title": "Debug information", "modules": utils.docs(bottle_default_app())}) return app.template_vars